package org.example;

/**
 * @author zhuyc
 * @version 1.0
 * @description TODO
 * @date 2022/6/9 10:01
 */

import org.example.dto.BookCatalogueDto;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;

import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * @author 三木猿
 * @version 1.0
 * @Title:
 * @date 2020/8/10 15:16
 */
public class DownloadBook {
	private static String dataSource;
	private static Pattern pattern=Pattern.compile("<a\\s*href=\"?([\\w\\W]*?)\"?[\\s]*?[^>]>([\\s\\S]*?)(?=</a>)");


	public static void setDataSource(String dataSource) {
		SSLHelper.init();
		DownloadBook.dataSource=dataSource;
	}

	public static void setDataSource(String dataSource, int count) {
		SSLHelper.init();
		DownloadBook.dataSource=dataSource;
		if ("biquge5200".equals(dataSource)) {
			while (true) {
				Thread thread1=new Thread(() -> {
					for (int i=1; i < (count >= 2 ? count / 2 : count); i++) {
						try {
							String bookCod="0_" + i;
							Document document=Jsoup.connect("https://www.biquge5200.com/" + bookCod + "/").get();
							Element info=document.getElementById("info");
							String bookName=info.select("h1").text();
							String path="/usr/local/webapps/file/" + bookName + ".txt";
							File file=new File(path);
							if (file.exists()) {
								continue;
							}
							System.out.println("---------------" + bookName + "正在下载" + "--------------");
							List<BookCatalogueDto> bookCatalogue=getBookCatalogue(bookCod, document, pattern);
							downloadBook(bookCod, bookName, bookCatalogue);
							System.out.println("---------------" + bookName + "下载完成" + "--------------");
						} catch (Exception e) {
							return;
						}
					}
				});
				assert count >= 2;
				Thread thread2=new Thread(() -> {
					for (int i=count / 2; i < count; i++) {
						try {
							i++;
							String bookCod="0_" + i;
							Document document=Jsoup.connect("https://www.biquge5200.com/" + bookCod + "/").get();
							Element info=document.getElementById("info");
							String bookName=info.select("h1").text();
							String path="/usr/local/webapps/file/" + bookName + ".txt";
							File file=new File(path);
							if (file.exists()) {
								continue;
							}
							System.out.println("---------------" + bookName + "正在下载" + "--------------");
							List<BookCatalogueDto> bookCatalogue=getBookCatalogue(bookCod, document, pattern);
							downloadBook(bookCod, bookName, bookCatalogue);
							System.out.println("---------------" + bookName + "下载完成" + "--------------");
						} catch (Exception e) {
							return;
						}
					}
				});
				thread1.start();
				thread2.start();
				try {
					thread1.join();
					thread2.join();
					break;
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}
		} else if ("biquge".equals(dataSource)) {
			while (true) {
				Thread thread1=new Thread(() -> {
					for (int j=1; j < count / 2; j++) {
						try {
							String bookCod="0_" + j;
							Document document=Jsoup.connect("https://www.biquge.com/" + bookCod + "/").get();
							Element info=document.getElementById("info");
							String bookName=info.select("h1").text();
							String path="/usr/local/webapps/file/" + bookName + ".txt";
							File file=new File(path);
							if (file.exists()) {
								continue;
							}
							List<BookCatalogueDto> bookCatalogue=getBookCatalogue(bookCod, document, pattern);
							System.out.println("---------------" + bookName + "正在下载" + "--------------");
							downloadBook(bookCod, bookName, bookCatalogue);
							System.out.println("---------------" + bookName + "下载完成" + "--------------");
						} catch (Exception e) {
							continue;
						}
					}
				});
				Thread thread2=new Thread(() -> {
					for (int j=count / 2; j < count; j++) {
						try {
							String bookCod="0_" + j;
							Document document=Jsoup.connect("https://www.biquge.com/" + bookCod + "/").get();
							Element info=document.getElementById("info");
							String bookName=info.select("h1").text();
							String path="/usr/local/webapps/file/" + bookName + ".txt";
							File file=new File(path);
							if (file.exists()) {
								continue;
							}
							List<BookCatalogueDto> bookCatalogue=getBookCatalogue(bookCod, document, pattern);
							System.out.println("---------------" + bookName + "正在下载" + "--------------");
							downloadBook(bookCod, bookName, bookCatalogue);
							System.out.println("---------------" + bookName + "下载完成" + "--------------");
						} catch (Exception e) {
							continue;
						}
					}
				});
				thread1.start();
				thread2.start();
				try {
					thread1.join();
					thread2.join();
					break;
				} catch (InterruptedException e) {
					e.printStackTrace();
				}
			}

		}
	}

	public static void downloadBook(String bookCod, String bookName, List<BookCatalogueDto> bookCatalogueDto) throws Exception {
		String path="/usr/local/webapps/file/" + bookName + ".txt";
		File file=new File(path);
		if (file.exists()) {
			return;
		}
		Map<Integer, List<BookCatalogueDto>> integerListMap=splitList(bookCatalogueDto, 3);
		long start=System.currentTimeMillis();
		Thread thread1=new Thread(() -> {
			try {
		/*		if ("biquge5200".equals(dataSource)) {
					biquge5200(bookCod, bookName + "1", integerListMap.get(0));
				} else if ("biquge".equals(dataSource)) {
					biquge(bookCod, bookName + "1", integerListMap.get(0));
				}*/
				execute(bookCod, bookName, 1, integerListMap.get(0));
			} catch (Exception e) {
				e.printStackTrace();
			}
		});
		Thread thread2=new Thread(() -> {
			try {
			/*	if ("biquge5200".equals(dataSource)) {
					biquge5200(bookCod, bookName + "2", integerListMap.get(1));
				} else if ("biquge".equals(dataSource)) {
					biquge(bookCod, bookName + "2", integerListMap.get(1));
				}*/
				execute(bookCod, bookName, 2, integerListMap.get(1));
			} catch (Exception e) {
				e.printStackTrace();
			}
		});
		Thread thread3=new Thread(() -> {
			try {
		/*		if ("biquge5200".equals(dataSource)) {
					biquge5200(bookCod, bookName + "3", integerListMap.get(2));
				} else if ("biquge".equals(dataSource)) {
					biquge(bookCod, bookName + "3", integerListMap.get(2));
				}*/
				execute(bookCod, bookName, 3, integerListMap.get(2));
			} catch (Exception e) {
				e.printStackTrace();
			}
		});
		thread1.start();
		thread2.start();
		thread3.start();
		thread1.join();
		thread2.join();
		thread3.join();
		//合并文件
		combine(bookName);
		long end=System.currentTimeMillis();
		System.out.println("本次下载共用时" + (end - start));
	}

	public static void execute(String bookCod, String bookName, int sequence, List<BookCatalogueDto> bookCatalogueDtoList) {
		try {
			if ("biquge5200".equals(dataSource)) {
				biquge5200(bookCod, bookName + sequence, bookCatalogueDtoList);
			} else if ("biquge".equals(dataSource)) {
				biquge(bookCod, bookName + sequence, bookCatalogueDtoList);
			} else if ("xbiquke".equals(dataSource)) {
				biquge(bookCod, bookName + sequence, bookCatalogueDtoList);
			} else if ("ddyueshu".equals(dataSource)) {
				biquge(bookCod, bookName + sequence, bookCatalogueDtoList);
			}

		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public static void biquge5200(String bookCod, String bookName, List<BookCatalogueDto> bookCatalogueDto) throws
			Exception {
		String path="/usr/local/webapps/file/downloading/" + bookName + ".txt";
		File file=new File(path);
		if (!file.exists()) {
			File dir=new File(file.getParent());
			dir.mkdirs();
			try {
				file.createNewFile();
			} catch (IOException e) {
				e.printStackTrace();
			}
		} else {
			List<BookCatalogueDto> bookCatalogueDtos=txtCatalogue(bookName);
			if (bookCatalogueDtos.size() != 0) {
				BookCatalogueDto bookCatalogueDto1=bookCatalogueDtos.get(bookCatalogueDtos.size() - 1);
				for (BookCatalogueDto catalogueDto : bookCatalogueDto) {
					if (catalogueDto.getCatalogueName().equals(bookCatalogueDto1.getCatalogueName())) {
						int i=bookCatalogueDto.indexOf(catalogueDto);
						bookCatalogueDto=bookCatalogueDto.subList(i + 1, bookCatalogueDto.size());
						break;
					}
				}
			}
		}

		//创建一个输出流,将爬到的小说以txt形式保存在硬盘
		BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
		if (bookCatalogueDto.size() == 0) {
			return;
		}
		bookCatalogueDto.forEach(e -> {

			Document document=null;
			try {
				document=Jsoup.connect("https://www.ddyueshu.com/" + bookCod + "/" + e.getCatalogueCod() + ".html").get();
			} catch (IOException ioException) {
				try {
					Thread.sleep(5000);
					try {
						document=
								Jsoup.connect("https://www.ddyueshu.com/" + bookCod + "/" + e.getCatalogueCod() + ".html").get();
					} catch (IOException exception) {
						return;
					}
				} catch (InterruptedException interruptedException) {
					interruptedException.printStackTrace();
				}
			}
			Elements chapterName=document.select("h1");
			try {
				bw.write(chapterName.text());
				bw.newLine();
				bw.flush();
			} catch (IOException ioException) {
				ioException.printStackTrace();
			}
			Elements elements=document.select("#content");
			List<TextNode> textNodes=elements.textNodes();
//			String html = elements.get(0).html().replace("<div id='content'>", "").replace("</div>", "");
//			String replace = html.replace("<script>readx();</script>", "").replace("<script>chaptererror();</script>", "");
			try {
//				String[] split = replace.replace("<p>", "").split("</p>");
				for (TextNode s : textNodes) {
					String text=s.text();
					if (text == null || text.trim().length() == 0) {
						continue;
					}
					bw.write(text);
					bw.newLine();
					bw.flush();
				}

			} catch (IOException ioException) {
				ioException.printStackTrace();
			}
		});
		try {
			bw.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public static List<BookCatalogueDto> getBookCatalogue(String bookCod, Document document, Pattern pattern) throws InterruptedException {
		List<BookCatalogueDto> bookCatalogueDtos=new ArrayList<>();
		Elements dd=document.getElementsByTag("dd");
		Map<Integer, List<Element>> integerListMap=splitList(dd.subList(6, dd.size()), 3);
		final List<BookCatalogueDto>[] bookCatalogueDtos1=new List[]{new ArrayList<>()};
		final List<BookCatalogueDto>[] bookCatalogueDtos2=new List[]{new ArrayList<>()};
		final List<BookCatalogueDto>[] bookCatalogueDtos3=new List[]{new ArrayList<>()};
		Thread thread1=new Thread(() -> {
			bookCatalogueDtos1[0]=get(integerListMap.get(0), bookCod, document, pattern);
		});
		Thread thread2=new Thread(() -> {
			bookCatalogueDtos2[0]=get(integerListMap.get(1), bookCod, document, pattern);
		});
		Thread thread3=new Thread(() -> {
			bookCatalogueDtos3[0]=get(integerListMap.get(2), bookCod, document, pattern);
		});
		thread1.start();
		thread2.start();
		thread3.start();
		thread1.join();
		thread2.join();
		thread3.join();
		bookCatalogueDtos.addAll(bookCatalogueDtos1[0]);
		bookCatalogueDtos.addAll(bookCatalogueDtos2[0]);
		bookCatalogueDtos.addAll(bookCatalogueDtos3[0]);
		return bookCatalogueDtos;
	}

	public static List<BookCatalogueDto> get(List<Element> dd, String bookCod, Document document, Pattern pattern) {
		List<BookCatalogueDto> bookCatalogueDtos=new ArrayList<>();
		for (int i=0; i < dd.size(); i++) {
			Element element=dd.get(i);
			BookCatalogueDto bookCatalogueDto=new BookCatalogueDto();
			Node node=element.childNode(0);
			for (Node e : element.childNodes()) {
				if (!"".equals(e.toString())) {
					node=e;
				}
			}
			String s1=node.toString();
			Matcher matcher=pattern.matcher(s1);
			if (matcher.find()) {
				String nameCodeUrl=matcher.group(1);
				String insStr=nameCodeUrl.substring(nameCodeUrl.lastIndexOf("/") + 1, nameCodeUrl.lastIndexOf("."));
				bookCatalogueDto.setCatalogueCod(Integer.parseInt(insStr));
			}
			bookCatalogueDto.setBookCod(bookCod);
			bookCatalogueDto.setCatalogueName(element.text());
			bookCatalogueDtos.add(bookCatalogueDto);
		}
		return bookCatalogueDtos;
	}

	private static void biquge(String bookCod, String bookName, List<BookCatalogueDto> bookCatalogueDto) throws FileNotFoundException {
		String path="/usr/local/webapps/file/downloading/" + bookName + ".txt";
		File file=new File(path);
		if (!file.exists()) {
			File dir=new File(file.getParent());
			dir.mkdirs();
			try {
				file.createNewFile();
			} catch (IOException e) {
				e.printStackTrace();
			}
		} else {
			List<BookCatalogueDto> bookCatalogueDtos=txtCatalogue(bookName);
			if (bookCatalogueDtos.size() != 0) {
				BookCatalogueDto bookCatalogueDto1=bookCatalogueDtos.get(bookCatalogueDtos.size() - 1);
				for (BookCatalogueDto catalogueDto : bookCatalogueDto) {
					if (catalogueDto.getCatalogueName().equals(bookCatalogueDto1.getCatalogueName())) {
						int i=bookCatalogueDto.indexOf(catalogueDto);
						bookCatalogueDto=bookCatalogueDto.subList(i + 1, bookCatalogueDto.size());
						break;
					}
				}
			}
		}
		//创建一个输出流,将爬到的小说以txt形式保存在硬盘
		BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
		if (bookCatalogueDto.size() == 0) {
			return;
		}
		bookCatalogueDto.forEach(e -> {
			Document document=null;
			try {
				document=Jsoup.connect("https://www.ddyueshu.com/" + e.getBookCod() + "/" + e.getCatalogueCod() + ".html").get();
			} catch (Exception e1) {
				try {
					Thread.sleep(5000);
					document=Jsoup.connect("https://www.ddyueshu.com/" + e.getBookCod() + "/" + e.getCatalogueCod() + ".html").get();
				} catch (InterruptedException interruptedException) {
					interruptedException.printStackTrace();
				} catch (Exception exception) {
					exception.printStackTrace();
				}
			}

			Elements chapterName=document.select("h1");
			try {
				bw.write(chapterName.text());
				bw.newLine();
				bw.flush();
			} catch (IOException ioException) {
				ioException.printStackTrace();
			}
			Elements elements=document.select("#content");
			List<TextNode> textNodes=elements.textNodes();
//			String html = elements.get(0).html().replace("<div id='content'>", "").replace("</div>", "");
//			String replace = html.replace("<script>readx();</script>", "").replace("<script>chaptererror();</script>", "");
			try {
//				String[] split = replace.split("<br>");
				for (TextNode s : textNodes) {
					String text=s.text();
					if (text == null || text.trim().length() == 0) {
						continue;
					}
					bw.write(text);
					bw.newLine();
					bw.flush();
				}

			} catch (IOException ioException) {
				ioException.printStackTrace();
			}
		});
		try {
			bw.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public static <T> Map<Integer, List<T>> splitList(List<T> t, int num) {
		Map<Integer, List<T>> subList=new HashMap<>();
		int num1=(int) Math.floor(t.size() / num);
		for (int i=0; i < num; i++) {
			subList.put(i, t.subList(i * num1, (i + 1) * num1));
			if (i == num - 1) {
				subList.put(i, t.subList(i * num1, t.size()));
			}
		}
		return subList;
	}

	public static void combine(String bookName) throws Exception {
		String bookPath="/usr/local/webapps/file/" + bookName + ".txt";
		File file=new File(bookPath);
		BufferedWriter bw=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file, true)));
		for (int i=1; i < 4; i++) {
			String path="/usr/local/webapps/file/downloading/" + bookName + i + ".txt";
			File file1=new File(path);
			if (file1.exists()) {
				BufferedReader br=new BufferedReader(new FileReader(file1));
				String line;
				while (true) {
					if (!((line=br.readLine()) != null)) {
						br.close();
						break;
					}
					bw.write(line);
					bw.newLine();
				}
			}
			file1.delete();
		}
		bw.flush();
		bw.close();
	}

	public static List<BookCatalogueDto> txtCatalogue(String bookName) {
		List<BookCatalogueDto> bookCatalogueDtos=new ArrayList<>();
		String fileNamedirs="/usr/local/webapps/file/downloading/" + bookName + ".txt";
		try {
			// 编码格式
			String encoding="utf-8";
			// 文件路径
			File file=new File(fileNamedirs);
			if (file.isFile() && file.exists()) { // 判断文件是否存在
				// 输入流
				InputStreamReader read=new InputStreamReader(new FileInputStream(file), encoding);// 考虑到编码格
				BufferedReader bufferedReader=new BufferedReader(read);
				String lineTxt=null;
				Long count=(long) 0;
				boolean bflag=false;
				int n=0;
				String newStr=null;
				String titleName=null;
				String newChapterName=null;//新章节名称
				String substring=null;
				int indexOf=0;
				int indexOf1=0;
				int line=0;
				//小说内容类
				BookCatalogueDto content;
				while ((lineTxt=bufferedReader.readLine()) != null) {
					content=new BookCatalogueDto();
					//小说名称
					content.setBookName(bookName);
					count++;
					// 正则表达式
					Pattern p=Pattern.compile("(^\\s*第)(.{1,9})[章节卷集部篇回](\\s{1})(.*)($\\s*)");
					Matcher matcher=p.matcher(lineTxt);
					newStr=newStr + lineTxt;
					while (matcher.find()) {
						titleName=matcher.group();
						//章节去空
						newChapterName=titleName.trim();
						//获取章节
						//System.out.println(newChapterName);
						content.setCatalogueName(newChapterName);
						indexOf1=indexOf;
						//System.out.println(indexOf);
						indexOf=newStr.indexOf(newChapterName);
						// System.out.println(newChapterName + ":" + "第" + count + "行"); // 得到返回的章
						if (bflag) {
							bflag=false;
							break;
						}
						if (n == 0) {
							indexOf1=newStr.indexOf(newChapterName);
						}
						n=1;
						bflag=true;
						//System.out.println(chapter);
						bookCatalogueDtos.add(content);
					}
				}
				bufferedReader.close();
			} else {
				System.out.println("找不到指定的文件");
			}
		} catch (Exception e) {
			System.out.println("读取文件内容出错");
			e.printStackTrace();
		}
		return bookCatalogueDtos;
	}
}
